In [479]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
#jupyter nbconvert --to html /content/filename.ipynb
In [ ]:
#data processing
#csv to pandas
exoDf = pd.read_csv('ExoplanetData.csv')
#remove HD from HD name column, redundant, also remove A
exoDf['hd_name'] = exoDf['hd_name'].str.extract('(\d+)', expand=False)
exoDf['hd_name'] = pd.to_numeric(exoDf['hd_name'], errors='coerce').fillna(exoDf['hd_name'])
exoDf['hd_name'] = exoDf['hd_name'].astype('Int64')
In [ ]:
hdDf = pd.read_csv('HDCatalogue.csv', sep=';')
In [76]:
#currently there are 121 different columns, lets reduce this to some usable ones.
#lets keep time discovered, name, plus anything that can be applied for regression,
#leaving out flags, discovery methods, etc.
#lets make a model that predicts based on our exoDf, the number of planets
#only keep necessary columns
#can make 3d plot of all observed exoplanets
columns_to_keep = ['pl_name', 'hostname', 'hd_name', 'hip_name', 'tic_id',
'gaia_id', 'sy_snum', 'sy_pnum', 'sy_mnum', 'pl_orbper',
'pl_orbsmax', 'pl_rade', 'pl_masse', 'pl_dens', 'pl_orbeccen',
'st_spectype', 'st_teff', 'st_rad', 'st_mass', 'st_met', 'st_lum',
'st_logg', 'st_age', 'st_dens', 'ra', 'dec', 'sy_dist', 'sy_plx',
'sy_bmag', 'sy_vmag', 'sy_kepmag']
exoDf = exoDf[columns_to_keep]
In [172]:
#make a 3d plot of all the stars? test this out
#converts degree in range 0-360 to a radian in range -pi to pi
def deg_from_neg_pi_to_pi(deg):
return (deg * np.pi / 180) - np.pi
#converts a degree in range -90 to 90 to a radian in range -pi/2 to pi/2
def deg_to_rad_90(deg):
return (deg * np.pi / 180)
raHd = hdDf['_RAJ2000'].apply(deg_from_neg_pi_to_pi)
raExo = exoDf['ra'].apply(deg_from_neg_pi_to_pi)
decHd = hdDf['_DEJ2000'].apply(deg_to_rad_90)
decExo = exoDf['dec'].apply(deg_to_rad_90)
# Set a higher DPI for better resolution
fig = plt.figure(figsize=(15, 10), dpi=200)
ax = plt.subplot(111, projection='aitoff')
# Initialize the plot with initial data
sc_hd = ax.scatter(raHd, decHd, s=1, c='blue', alpha=0.1)
sc_exo = ax.scatter(raExo, decExo, s=1, c='red', alpha=0.3)
ax.set_xlabel('Right Ascension')
ax.set_ylabel('Declination')
ax.set_title('Systems with Confirmed Exoplanets')
ax.grid(True)
# Update function for animation
def update(frame):
global sc_hd, sc_exo, sc_zero_line
# Shift in radians
shift_radians = np.deg2rad(frame) % (2 * np.pi)
# Update the RA for HD stars
new_ra_hd = ((raHd + shift_radians + np.pi) % (2 * np.pi)) - np.pi
# Update the RA for exoplanets
new_ra_exo = ((raExo + shift_radians + np.pi) % (2 * np.pi)) - np.pi
# Update the scatter plot data
sc_hd.set_offsets(np.column_stack((new_ra_hd, decHd)))
sc_exo.set_offsets(np.column_stack((new_ra_exo, decExo)))
return sc_hd, sc_exo
#uncomment if you want to make the animation!
#ani = animation.FuncAnimation(fig, update, frames=np.arange(0, 360, 2), interval = 100, blit=True, repeat=True)
#ani.save('stars_animation.mp4', writer='ffmpeg', dpi=500)
plt.show()